In [1]:
import pandas as pd
import plotly.express as px

Configuration variables¶

In [2]:
granularity = 'second' # you can change to 'minute'
bucket_column = f"{granularity}_bucket"
# These filters are used to limit the data when granularity is 'second', otherwise the plot might crash. Feel free to change them.
from_timestamp = pd.to_datetime('2025-03-28 00:00:00.000 +0100')
to_timestamp = pd.to_datetime('2025-03-29 00:00:00.000 +0100')

Read the data from the CSV files¶

In [3]:
df = pd.read_csv(f"../data/water_consumption_by_{granularity}.csv", parse_dates=[bucket_column])
df.sort_values(bucket_column, inplace=True)

# Filter the data if granularity is 'second'
if granularity == 'second':
    df = df[(df[bucket_column] >= from_timestamp) & (df[bucket_column] <= to_timestamp)]

df.head()
Out[3]:
second_bucket pulse_count liters inserted_at
103070 2025-03-28 00:00:00+01:00 50.0 0.083112 2025-04-03 20:23:14.110 +0200
103071 2025-03-28 00:00:01+01:00 50.0 0.083112 2025-04-03 20:23:14.110 +0200
103072 2025-03-28 00:00:02+01:00 50.0 0.083112 2025-04-03 20:23:14.110 +0200
103073 2025-03-28 00:00:03+01:00 50.0 0.083112 2025-04-03 20:23:14.110 +0200
103074 2025-03-28 00:00:04+01:00 50.0 0.083112 2025-04-03 20:23:14.110 +0200
In [4]:
df_events = pd.read_csv("../data/event_labels.csv", parse_dates=["start_timestamp", "end_timestamp"])

# Filter the data if granularity is 'second'
if granularity == 'second':
    df_events = df_events[(df_events["end_timestamp"] >= from_timestamp) & (df_events["start_timestamp"] <= to_timestamp)]

df_events.head(20)
Out[4]:
category tag start_timestamp end_timestamp
21 Ducha #Hijo 2025-03-27 23:57:48.768000+01:00 2025-03-28 00:04:35.987000+01:00
22 Grifo NaN 2025-03-28 00:06:34.087000+01:00 2025-03-28 00:06:56.241000+01:00
23 Grifo NaN 2025-03-28 00:11:46.847000+01:00 2025-03-28 00:12:06.577000+01:00
24 Cisterna NaN 2025-03-28 03:02:21.701000+01:00 2025-03-28 03:04:23.373000+01:00
25 Cisterna #Papá 2025-03-28 06:01:59.649000+01:00 2025-03-28 06:03:30.249000+01:00
26 Ducha #Mamá 2025-03-28 06:07:29.603000+01:00 2025-03-28 06:11:45.796000+01:00
27 Ducha #Papá 2025-03-28 07:17:57.855000+01:00 2025-03-28 07:21:21.430000+01:00
28 Cisterna #Papá 2025-03-28 06:49:47.776000+01:00 2025-03-28 06:50:46.829000+01:00
29 Lavaplatos NaN 2025-03-28 07:34:28.333000+01:00 2025-03-28 08:14:30.014000+01:00
30 Lavaplatos NaN 2025-03-28 11:36:14.134000+01:00 2025-03-28 12:24:04.908000+01:00
31 Cisterna #Papá 2025-03-28 18:09:14.701000+01:00 2025-03-28 18:09:36.386000+01:00
32 Cisterna #Mamá 2025-03-28 18:43:52.148000+01:00 2025-03-28 18:44:13.374000+01:00

Plot the water consumption time series¶

In [8]:
fig = px.line(
    df,
    x=bucket_column, 
    y="liters",
    title=f"Household Water Consumption (liters per {granularity})",
    labels={bucket_column: "Tiempo", "liters": "Litros"}
)

fig.update_layout(
    xaxis_title="Time",
    yaxis_title=f"Liters per {granularity}",
    height=600,
    width=600*2.5
)

fig.show()

Add the event labels¶

In [9]:
label_y_positions = [1.0, 0.9, 0.8]
label_index = 0
for _, row in df_events.iterrows():
    y_position = label_y_positions[label_index % len(label_y_positions)]
    label_index += 1
    fig.add_vrect(
        x0=row["start_timestamp"], x1=row["end_timestamp"],
        fillcolor="red", opacity=0.2, line_width=0,
        annotation_text=row["category"],
        annotation_position="top left",
        annotation_yref="paper",
        annotation_y=y_position
    )

fig.show()
In [ ]: